This is a draft vignette, showing a proposed workflow for applying NICHES to the IPF Atlas.

This document uses a downsampled version of the 2019 IPF Atlas.

If we like overall approach, we could try scaling-up to the integrated nucSeq & cell data.

Load Data

load("~/T5/Kaminski_Lab/control.down.Robj")
load("~/T5/Kaminski_Lab/ipf.down.Robj")

Inspect Data

table(Idents(control.down))
## 
##          ncMonocyte Macrophage_Alveolar                  NK           cMonocyte 
##                1058                3000                2744                3000 
##           Lymphatic          Macrophage          Fibroblast               Basal 
##                 973                3000                 847                 114 
##       Myofibroblast           Multiplet                cDC2                   B 
##                 204                1000                1364                1227 
##           VE_Venous                   T                ATII                 ATI 
##                 226                3000                2655                 502 
##      VE_Capillary_A         VE_Arterial            Ciliated                Club 
##                 192                 274                1144                 226 
##      VE_Capillary_B            B_Plasma               ILC_A         T_Cytotoxic 
##                 436                 232                 276                3000 
##              Goblet        T_Regulatory         Mesothelial               ILC_B 
##                 101                 316                  46                 137 
##                 SMC                cDC1       DC_Langerhans                Mast 
##                  69                 199                  18                 165 
##                 pDC           DC_Mature    VE_Peribronchial            Pericyte 
##                  52                 151                  83                  24 
##                PNEC            Ionocyte 
##                  17                   2
table(Idents(ipf.down))
## 
##          Macrophage            Ciliated           cMonocyte                   B 
##                3000                3000                3000                3000 
##                Club              Goblet           Multiplet Macrophage_Alveolar 
##                1855                1044                2765                3000 
##                   T          ncMonocyte                Mast          Fibroblast 
##                3000                1931                 572                1443 
##         T_Cytotoxic                cDC2               Basal                ATII 
##                3000                3000                1472                 496 
##        T_Regulatory    VE_Peribronchial   Aberrant_Basaloid                cDC1 
##                 844                1919                 448                 373 
##                  NK               ILC_B       Myofibroblast           VE_Venous 
##                3000                 169                2886                 430 
##       DC_Langerhans                 pDC            B_Plasma      VE_Capillary_B 
##                 282                 430                 192                 494 
##         VE_Arterial           Lymphatic           DC_Mature      VE_Capillary_A 
##                 330                 733                 364                 206 
##                 ATI            Ionocyte               ILC_A                 SMC 
##                 176                  23                 230                 556 
##         Mesothelial            Pericyte                PNEC 
##                 233                 578                  12

Merge together disease and control and check that idents are set correctly

merge <- merge(control.down,ipf.down)
#table(merge$Subject_Identity)
table(merge$Disease_Identity,merge$Subject_Identity)
##          
##           001C 002C 003C 010I 021I 022I 025I 034C 034I 040I 041I 051I 053I 063I
##   Control  789  148 1314    0    0    0    0  147    0    0    0    0    0    0
##   IPF        0    0    0 1676 1937 2384  705    0 2514 1533 1137 2092 1099  898
##          
##           065C 081C 084C 092C 098C 123I 133C 135I 1372C 137C 138I 145I 157I
##   Control  465  161  151  656 3844    0 3283    0  1814  329    0    0    0
##   IPF        0    0    0    0    0  694    0 1968     0    0 1527 1693 2509
##          
##           158I 160C 166I 174I 177I 179I 192C 208C 209I 210I 212I 214I 218C 221I
##   Control    0  980    0    0    0    0 1093  406    0    0    0    0 1172    0
##   IPF     2127    0 1447 2420 2141 1360    0    0 1312  748 1481  244    0 1601
##          
##           222C 222I 225I 226C 228I 244C 253C 296C  29I 388C 396C 439C 454C 465C
##   Control 4098    0    0 1951    0  138  468 1308    0  668  409 3216  621 1432
##   IPF        0 3166 3851    0  935    0    0    0  327    0    0    0    0    0
##          
##            47I 483C 484C  49I  59I
##   Control    0  483  530    0    0
##   IPF      622    0    0 1669  669
#table(merge$Manuscript_Identity) == table(Idents(merge))

Defining a color scheme for plotting

# Define colors
col.pal <- list()
col.pal$Disease <- c('orange','purple3')
col.pal$Disease <- c(brewer.pal(6,'Accent')[5:6])
col.pal$Disease <- c('darkorange','blue3')
names(col.pal$Disease) <- c('Control','IPF')
col.pal$Class <- c(brewer.pal(4,'Set1'))
names(col.pal$Class) <- c('Epithelium','Endothelium','Mesenchyme','Immune')
col.pal$Type <- c('firebrick','steelblue','springgreen','purple','salmon','skyblue','midnightblue','orangered','violetred',
                  'tomato','seashell','sandybrown','saddlebrown','royalblue','plum4',
                  'lightgoldenrod','lawngreen','forestgreen','dimgray','deeppink',
                  'red2','paleturquoise1','palevioletred','orchid4','seagreen2','plum1','olivedrab2',
                  'slateblue','mediumvioletred','sienna','orange','seagreen',
                  'lightseagreen','mediumpurple4','dodgerblue','goldenrod1','grey30','darkred')
names(col.pal$Type) <- c("Aberrant_Basaloid","ATI","ATII","Basal" ,"Ciliated","Club",  "Goblet","Ionocyte","PNEC",  
                         "B","B_Plasma","cDC1","cDC2","cMonocyte","DC_Langerhans","DC_Mature","ILC_A","ILC_B",
                         "Macrophage","Macrophage_Alveolar","Mast","ncMonocyte","NK","pDC","T","T_Cytotoxic","T_Regulatory",
                         "Fibroblast","Mesothelial","Myofibroblast","Pericyte","SMC",
                         "Lymphatic","VE_Arterial","VE_Capillary_A","VE_Capillary_B","VE_Peribronchial","VE_Venous")

Visualizing the starting atlas:

# Ipf cell atlas
merge <- subset(merge, idents='Multiplet',invert=T)
merge <- ScaleData(merge)
merge <- FindVariableFeatures(merge)
merge <- RunPCA(merge)
#ElbowPlot(merge)
merge <- RunUMAP(merge,dims = 1:50)
merge$Manuscript_Identity <- factor(merge$Manuscript_Identity,levels = c("Aberrant_Basaloid","ATI","ATII","Basal" ,"Ciliated","Club",  "Goblet","Ionocyte","PNEC",  
                                                                         "B","B_Plasma","cDC1","cDC2","cMonocyte","DC_Langerhans","DC_Mature","ILC_A","ILC_B",
                                                                         "Macrophage","Macrophage_Alveolar","Mast","ncMonocyte","NK","pDC","T","T_Cytotoxic","T_Regulatory",
                                                                         "Fibroblast","Mesothelial","Myofibroblast","Pericyte","SMC",
                                                                         "Lymphatic","VE_Arterial","VE_Capillary_A","VE_Capillary_B","VE_Peribronchial","VE_Venous"))
DimPlot(merge,group.by = 'Manuscript_Identity',cols = col.pal$Type,pt.size = 1,shuffle = T)+ggtitle('IPF Cell Atlas')+
  theme(plot.title=element_text(hjust=0))+
  theme(plot.title = element_text(size = 30, face = "bold"))+
  DarkTheme()+
  guides(color=guide_legend(ncol =1,override.aes = list(size=5)))+
  NoAxes()

Now we are ready to compute the NICHES information and begin investigating:

1. Split by sample

split <- SplitObject(merge,split.by = 'Subject_Identity')

2. Impute by sample

options(warn = 1)
for(i in 1:length(split)){
  split[[i]] <- RunALRA(split[[i]])
  gc()
}
# Save imputed data
gc()
save(split,file='control.ipf.split.imputed.Robj')
load('control.ipf.split.imputed.Robj')

2b. Add Cell Class Metadata

names(table(Idents(merge)))
for(i in 1:length(split)){
Idents(split[[i]]) <- split[[i]]$Manuscript_Identity
split[[i]] <- RenameIdents(split[[i]],c("Aberrant_Basaloid"='Epithelium',
                                          "ATI"='Epithelium',
                                          "ATII"='Epithelium',                 
                                          "B"='Immune',           
                                          "B_Plasma"='Immune',         
                                          "Basal"='Epithelium'  ,              
                                          "cDC1"='Immune',        
                                          "cDC2"='Immune',       
                                          "Ciliated"='Epithelium',           
                                          "Club"='Epithelium',         
                                          "cMonocyte"='Immune',    
                                          "DC_Langerhans"='Immune',   
                                          "DC_Mature"='Immune',  
                                          "Fibroblast"='Mesenchyme',     
                                          "Goblet"='Epithelium',    
                                          "ILC_A"='Immune',
                                          "ILC_B"='Immune' ,              
                                          "Ionocyte"='Epithelium',            
                                          "Lymphatic"='Endothelium',           
                                          "Macrophage"='Immune'     ,    
                                          "Macrophage_Alveolar"='Immune', 
                                          "Mast"='Immune'           ,
                                          "Mesothelial"='Mesenchyme' ,          
                                          "Myofibroblast"='Mesenchyme',      
                                          "ncMonocyte"='Immune',          
                                          "NK"='Immune',         
                                          "pDC"='Immune',        
                                          "Pericyte"='Mesenchyme' ,           
                                          "PNEC"='Epithelium',         
                                          "SMC"='Mesenchyme',         
                                          "T"='Immune',    
                                          "T_Cytotoxic"='Immune',   
                                          "T_Regulatory"='Immune',  
                                          "VE_Arterial"='Endothelium'   ,     
                                          "VE_Capillary_A"='Endothelium' ,     
                                          "VE_Capillary_B"='Endothelium'  ,    
                                          "VE_Peribronchial"='Endothelium' ,   
                                          "VE_Venous"='Endothelium'))
split[[i]]$CellClass <- Idents(split[[i]])
Idents(split[[i]]) <- split[[i]]$Manuscript_Identity
}
names(split[[i]]@meta.data)

3. Run NICHES, with all three outputs selected

We compute SystemToCell, CellToSystem, and CellToCell independently. We ask NICHES to use the ALRA-imputed data slot in this instance, and to map against the FANTOM5 database of known logand-receptor interactions. We tell NICHES to sample all cell type crosses based on the “Manuscript_Identity” slot.

scc.list <- list()
for(i in 1:length(split)){
  print(i)
  scc.list[[i]] <- RunNICHES(split[[i]],
                             LR.database="fantom5",
                             species="human",
                             assay="alra",
                             cell_types = "Manuscript_Identity",
                             meta.data.to.map = names(split[[i]]@meta.data),
                             SystemToCell = T,
                             CellToCell = T,
                             CellToSystem=T)
}
names(scc.list) <- names(split)     

4. Merge outputs together to yield a single object with each NICHES data output type

temp.list <- list()
for(i in 1:length(scc.list)){
  temp.list[[i]] <- scc.list[[i]]$CellToCell # Isolate CellToCell Signaling
}
cell.to.cell <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
cell.to.cell
temp.list <- list()
for(i in 1:length(scc.list)){
  temp.list[[i]] <- scc.list[[i]]$SystemToCell # Isolate SystemToCell Signaling
}
system.to.cell <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
system.to.cell
temp.list <- list()
for(i in 1:length(scc.list)){
  temp.list[[i]] <- scc.list[[i]]$CellToSystem # Isolate CellToSystem Signaling
}
cell.to.system <- merge(temp.list[[1]],temp.list[2:length(temp.list)])
cell.to.system
save(cell.to.cell,file='cell.to.cell.Robj')

Let’s load the CellToCell data output type for further analysis in this vignette, as it tends to be the easiest to interpret:

load('cell.to.cell.Robj')
cell.to.cell
## An object of class Seurat 
## 2346 features across 1071580 samples within 1 assay 
## Active assay: CellToCell (2346 features, 0 variable features)

5. Clean NICHES data

This step is essential to getting clean embeddings, clusterings, and downstream data analysis in later steps

#table(Idents(cell.to.cell))
#table(Idents(cell.to.cell),cell.to.cell$Subject_Identity.Joint)
VlnPlot(cell.to.cell,
        features = 'nFeature_CellToCell',
        group.by = 'Disease_Identity.Joint',
        pt.size=0,log = T)

VlnPlot(cell.to.cell,
        features = 'nFeature_CellToCell',
        group.by = 'Subject_Identity.Joint',
        pt.size=0,log = T)+NoLegend()

From the above, let’s set a cutoff saying tht we only want to consider cell-cell crosses with at least 100 unique features (ligand-receptor mechanisms). We are also not interested in Multiplets as either a sending or a receiving cell. Saving this as a temporary file so can load quickly later.

#table(cell.to.cell$SendingType)
cell.to.cell <- subset(cell.to.cell,subset=SendingType=='Multiplet',invert=T)
#table(cell.to.cell$SendingType)
cell.to.cell <- subset(cell.to.cell,subset=ReceivingType=='Multiplet',invert=T)
#table(cell.to.cell$ReceivingType)
temp <- subset(cell.to.cell,nFeature_CellToCell>100) # Choose this limit based on the above violin plots or similar
temp
save(temp,file = 'ipf.niches.temp.Robj')
gc()
load('ipf.niches.temp.Robj')
VlnPlot(temp,
        features = 'nFeature_CellToCell',
        group.by = 'Disease_Identity.Joint',
        pt.size=0,log = T)

# VlnPlot(temp,
#         features = 'nFeature_CellToCell',
#         group.by = 'Subject_Identity.Joint',
#         pt.size=0,log = T)+NoLegend()

Now that the data is cleaned (limited to high-information crosses) we are ready to embed, cluster, and visualize our NICHES data so that we may identify data-set relevant patterns of interest:

6. Visualization

Perform PCA

temp <- ScaleData(temp)
temp <- FindVariableFeatures(temp)
temp <- RunPCA(temp,npcs = 100)
ElbowPlot(temp,ndim=100)

PCHeatmap(temp,balanced=T,cells=200,dims=1:9)

PCHeatmap(temp,balanced=T,cells=200,dims=10:18)

PCHeatmap(temp,balanced=T,cells=200,dims=19:27)

PCHeatmap(temp,balanced=T,cells=200,dims=28:36)

Visualize a trial embedding:

temp <- RunUMAP(temp,dims = 1:30)
DimPlot(temp,raster = FALSE)+NoLegend()

Looks reasonable. Let’s look at how the metadata maps within this embedding:

# Total connectomics
p1 <- DimPlot(temp,group.by = 'Disease_Identity.Sending',shuffle = T,raster = F,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')
p2 <- DimPlot(temp,group.by = 'Subject_Identity.Sending',shuffle = T,raster = F)+NoLegend()+NoAxes()+ggtitle('Subject Identity')
p3 <- DimPlot(temp,group.by = 'CellClass.Sending',shuffle = T,label = F,raster = F,cols = col.pal$Class)+NoAxes()+ggtitle('Sending Cell Class')
p4 <- DimPlot(temp,group.by = 'CellClass.Receiving',shuffle = T,label = F,raster = F,cols = col.pal$Class)+NoAxes()+ggtitle('Receiving Cell Class')

cowplot::plot_grid(p1,p2,p3,p4,align = T)

This looks good. Let’s break this down into individual class-class crosses and look for cell-cell signaling archetypes that are either enriched or lost in IPF vs. Control.

Epi-Mes

epi.mes <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.mes <- subset(epi.mes,subset=CellClass.Receiving=='Mesenchyme')
epi.mes <- subset(epi.mes,subset=CellClass.Sending!='Multiplet') # Just a precaution
table(epi.mes$CellClass.Joint)
## 
## Epithelium - Mesenchyme 
##                    6142
epi.mes <- ScaleData(epi.mes)
epi.mes <- FindVariableFeatures(epi.mes)
epi.mes <- RunPCA(epi.mes)
ElbowPlot(epi.mes,ndims = 50)

epi.mes <- RunUMAP(epi.mes,dims = 1:10)
epi.mes <- FindNeighbors(epi.mes,dims = 1:10)
epi.mes <- FindClusters(epi.mes,resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6142
## Number of edges: 186998
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8185
## Number of communities: 10
## Elapsed time: 0 seconds
p1 <- DimPlot(epi.mes,group.by = 'Disease_Identity.Sending',shuffle = T)
p2 <- DimPlot(epi.mes,shuffle = T)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Sending',shuffle = T)
p4 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Receiving',shuffle = T)
cowplot::plot_grid(p1,p2,p3,p4)

It looks like cluster 7 is enriched in IPF and is associated with aberrant basaloid cells. Let’s see some mechanisms that are associated with this signaling archetype:

mark.epi.mes <- FindMarkers(epi.mes,ident.1 = '7',only.pos = T)
mark.epi.mes$ratio <- mark.epi.mes$pct.1/mark.epi.mes$pct.2
data.table::setorderv(mark.epi.mes,'ratio',order = -1)
knitr::kable(mark.epi.mes[1:20,])
p_val avg_log2FC pct.1 pct.2 p_val_adj ratio
IL11—IL11RA 0 0.5870075 0.250 0.015 0 16.666667
FGF2—FGFR4 0 0.2612598 0.102 0.007 0 14.571429
CGB8—SIRPA 0 0.3850679 0.145 0.010 0 14.500000
PDGFB—PDGFRB 0 1.0384244 0.375 0.029 0 12.931035
PDGFB—LRP1 0 1.3886025 0.398 0.031 0 12.838710
IGF2—IGF2R 0 1.3062871 0.385 0.030 0 12.833333
EFNA4—EPHA7 0 0.3057615 0.140 0.012 0 11.666667
CXCL12—ACKR3 0 0.3264253 0.105 0.009 0 11.666667
SEMA7A—PLXNC1 0 0.6341920 0.173 0.015 0 11.533333
FGF2—NRP1 0 1.3083440 0.355 0.031 0 11.451613
LTB—TNFRSF1A 0 2.4686443 0.355 0.032 0 11.093750
PDGFB—ITGAV 0 0.8274260 0.281 0.026 0 10.807692
LTB—LTBR 0 1.4872515 0.214 0.020 0 10.700000
LTB—CD40 0 0.7005923 0.107 0.010 0 10.700000
SEMA7A—ITGA1 0 1.8054061 0.556 0.052 0 10.692308
IGF2—INSR 0 0.8759508 0.212 0.020 0 10.600000
IGF2—IGF1R 0 1.4794769 0.327 0.031 0 10.548387
FGF2—FGFR1 0 1.6137264 0.429 0.043 0 9.976744
SEMA7A—ITGB1 0 2.0642711 0.793 0.081 0 9.790123
FGF2—SDC2 0 1.1186209 0.319 0.034 0 9.382353

We can visualize chosen markers within the embedding if we choose, as follows:

p5 <- FeaturePlot(epi.mes,'WNT7A—LRP6',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.mes,'PDGFB—PDGFRB',order = T,pt.size = 1,max.cutoff = 6)
cowplot::plot_grid(p5,p6,nrow=1)

Putting this all together as a small multiple with nice colors:

p1 <- DimPlot(epi.mes,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.mes,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.mes,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.mes,'WNT7A—LRP6',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.mes,'PDGFB—PDGFRB',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))

cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)

Alternatively, we can create a complex heatmap which shows a great deal of information at once, something like as follows (this uses a custom build ‘ArchetypeHeatmap’ function which is a NICHES-generalized wrapper for ComplexHeatmap):

ArchetypeHeatmap(epi.mes,
                 primary = 'seurat_clusters' ,
                 secondary = 'Manuscript_Identity.Sending' ,
                 tertiary = 'Manuscript_Identity.Receiving' ,
                 quarternary = 'Disease_Identity.Sending' ,
                 primary.cols = NULL,
                 secondary.cols = col.pal$Type, # Need to be a named list of colors
                 tertiary.cols = col.pal$Type,
                 quarternary.cols = col.pal$Disease,
                 save.markers = T,
                 return.markers = T,
                 MOI.by = 'ratio',
                 labels = c('Signaling Archetype','Sending Cell Type','Receiving Cell Type','Condition'),
                 pt.size = 0.5,
                 return.plot = F,
                 print.plot = F,
                 filetag = 'IPF_NICHE_Epi.Mes',
                 exclude = 'ITGA|ITGB|LRP1') 

### We can save our work for later as follows:

# save(epi.mes,file = 'epi.mes.Robj')
# save(mark.epi.mes,file = 'mark.epi.mes.Robj')

Now that we have a standardized workflow, let’s turn our attention to the other (15) class-class crosses, each in turn:

Epi-End

epi.end <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.end <- subset(epi.end,subset=CellClass.Receiving=='Endothelium')
epi.end <- subset(epi.end,subset=CellClass.Sending!='Multiplet')
table(epi.end$CellClass.Joint)
## 
## Epithelium - Endothelium 
##                     5276
epi.end <- ScaleData(epi.end)
epi.end <- FindVariableFeatures(epi.end)
epi.end <- RunPCA(epi.end)
ElbowPlot(epi.end,ndims = 50)

epi.end <- RunUMAP(epi.end,dims = 1:8)
epi.end <- FindNeighbors(epi.end,dims = 1:8)
epi.end <- FindClusters(epi.end,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5276
## Number of edges: 158742
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8360
## Number of communities: 8
## Elapsed time: 0 seconds
p1 <- DimPlot(epi.end,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.end)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)

It looks like cluster 4 is enriched in IPF. Let’s investigate further:

mark.epi.end <- FindMarkers(epi.end,ident.1 = '4',only.pos = T)
mark.epi.end$ratio <- mark.epi.end$pct.1/mark.epi.end$pct.2
data.table::setorderv(mark.epi.end,'ratio',order = -1)
knitr::kable(mark.epi.end[1:20,])
p_val avg_log2FC pct.1 pct.2 p_val_adj ratio
CSF2—CSF2RB 0 0.7381961 0.141 0.016 0 8.812500
LTB—CD40 0 1.1563510 0.113 0.013 0 8.692308
TNFSF15—TNFRSF25 0 0.2817680 0.100 0.012 0 8.333333
MFGE8—PDGFRB 0 0.5416215 0.130 0.019 0 6.842105
DLL4—NOTCH2 0 0.2708609 0.126 0.019 0 6.631579
LTB—LTBR 0 1.4476810 0.142 0.022 0 6.454546
TNF—LTBR 0 0.4129545 0.106 0.017 0 6.235294
FGF1—EGFR 0 0.4543708 0.187 0.030 0 6.233333
COL4A1—ITGA2 0 0.9069791 0.180 0.029 0 6.206897
COL6A2—ITGA2 0 0.9548353 0.138 0.023 0 6.000000
BMP2—BMPR1A 0 0.5115480 0.162 0.027 0 6.000000
IGF2—IGF2R 0 0.8470069 0.177 0.030 0 5.900000
PLAU—ITGB2 0 0.7596012 0.128 0.022 0 5.818182
EFNB2—RHBDL2 0 0.7260124 0.248 0.043 0 5.767442
COL5A1—SDC3 0 0.3803499 0.119 0.021 0 5.666667
FGF2—FGFR1 0 0.7229520 0.158 0.028 0 5.642857
BMP2—BMPR1B 0 0.4650717 0.128 0.023 0 5.565217
IGF2—INSR 0 1.4002259 0.165 0.030 0 5.500000
FGF1—CD44 0 0.5280894 0.141 0.026 0 5.423077
VCAN—EGFR 0 1.5668954 0.241 0.045 0 5.355556

Picking a few markers of interest:

p5 <- FeaturePlot(epi.end,'FGF2—FGFR1',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.end,'BMP5—ACVR2A',order = T,pt.size = 1,max.cutoff = 4)
cowplot::plot_grid(p5,p6)

We can then put all together:

p1 <- DimPlot(epi.end,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.end,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.end,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.end,'FGF2—FGFR1',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.end,'BMP5—ACVR2A',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)

Saving our work for later:

save(mark.epi.end,file = 'mark.epi.end.Robj')
save(epi.end,file = 'epi.end.Robj')

Epi-Epi

Epithelial autocrine signalinqg:

epi.epi <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.epi <- subset(epi.epi,subset=CellClass.Receiving=='Epithelium')
epi.epi <- subset(epi.epi,subset=CellClass.Sending!='Multiplet')
table(epi.epi$CellClass.Joint)
## 
## Epithelium - Epithelium 
##                   18436
epi.epi <- ScaleData(epi.epi)
epi.epi <- FindVariableFeatures(epi.epi)
epi.epi <- RunPCA(epi.epi)
ElbowPlot(epi.epi,ndims = 50)

epi.epi <- RunUMAP(epi.epi,dims = 1:8)
epi.epi <- FindNeighbors(epi.epi,dims = 1:8)
epi.epi <- FindClusters(epi.epi,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 18436
## Number of edges: 544690
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8841
## Number of communities: 9
## Elapsed time: 2 seconds
p1 <- DimPlot(epi.epi,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.epi)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)

Epi-epi autocrine marker mechanisms specific to IPF / aberrant basaloids (cluster 2):

mark.epi.epi <- FindMarkers(epi.epi,ident.1 = '2',only.pos = T)
mark.epi.epi$ratio <- mark.epi.epi$pct.1/mark.epi.epi$pct.2
data.table::setorderv(mark.epi.epi,'ratio',order = -1)
knitr::kable(mark.epi.epi[1:20,])
p_val avg_log2FC pct.1 pct.2 p_val_adj ratio
IL23A—IL12RB1 0 0.3158816 0.108 0.006 0 18.000000
COL5A1—SDC3 0 0.4062673 0.118 0.007 0 16.857143
SEMA7A—ITGA1 0 0.5935414 0.166 0.010 0 16.600000
EFNB1—EPHB6 0 0.4124380 0.178 0.012 0 14.833333
EFNB1—EPHB3 0 0.4473471 0.207 0.014 0 14.785714
FGF2—FGFR3 0 0.3041765 0.118 0.008 0 14.750000
COL6A2—ITGA1 0 1.1209605 0.180 0.013 0 13.846154
FGF2—NRP1 0 0.4402414 0.122 0.009 0 13.555556
MFAP2—NOTCH1 0 0.4014025 0.149 0.011 0 13.545454
PLAU—ITGA5 0 0.4948389 0.106 0.008 0 13.250000
EFNA4—EPHA1 0 0.3796532 0.196 0.015 0 13.066667
TNF—TNFRSF21 0 0.6304780 0.102 0.009 0 11.333333
NOV—PLXNA1 0 0.3509425 0.101 0.009 0 11.222222
PGF—NRP1 0 0.5190593 0.140 0.013 0 10.769231
FGF2—SDC1 0 0.9638212 0.198 0.019 0 10.421053
COL5A1—ITGA1 0 0.5932909 0.164 0.016 0 10.250000
PDGFB—ITGAV 0 0.9152901 0.150 0.015 0 10.000000
EFNB1—EPHB4 0 0.5906749 0.249 0.025 0 9.960000
COL18A1—KDR 0 1.1758647 0.254 0.027 0 9.407407
WNT7A—FZD5 0 0.4435647 0.176 0.019 0 9.263158

Finding a few autocrine mechanisms of interest that seen disease relevant:

p5 <- FeaturePlot(epi.epi,'WNT7A—FZD5',order = T,pt.size = 1,max.cutoff = 6)
p6 <- FeaturePlot(epi.epi,'NOV—NOTCH1',order = T,pt.size = 1,max.cutoff = 6)
cowplot::plot_grid(p5,p6)

Epi-epi small multiple:

p1 <- DimPlot(epi.epi,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.epi,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.epi,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p5 <- FeaturePlot(epi.epi,'WNT7A—FZD5',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.epi,'NOV—NOTCH1',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)

Saving for later:

save(mark.epi.epi,file = 'mark.epi.epi.Robj')
save(epi.epi,file = 'epi.epi.Robj')

Epi-Imm

Now let’s look at epithelium-immune signaling specifically:

epi.imm <- subset(temp,subset=CellClass.Sending=='Epithelium')
epi.imm <- subset(epi.imm,subset=CellClass.Receiving=='Immune')
epi.imm <- subset(epi.imm,subset=CellClass.Sending!='Multiplet')
table(epi.imm$CellClass.Joint)
## 
## Epithelium - Immune 
##               17904
epi.imm <- ScaleData(epi.imm)
epi.imm <- FindVariableFeatures(epi.imm)
epi.imm <- RunPCA(epi.imm)
ElbowPlot(epi.imm,ndims = 50)

epi.imm <- RunUMAP(epi.imm,dims = 1:30)
epi.imm <- FindNeighbors(epi.imm,dims = 1:30)
epi.imm <- FindClusters(epi.imm,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 17904
## Number of edges: 616654
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8788
## Number of communities: 15
## Elapsed time: 2 seconds
p1 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending')
p2 <- DimPlot(epi.imm)+ggplot2::ggtitle('Signaling Archetype')
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending')
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving')
cowplot::plot_grid(p1,p2,p3,p4)

Let’s find some marker for cluster 5 which is driven predominantly by aberrant basaloids:

mark.epi.imm <- FindMarkers(epi.imm,ident.1 = '5',only.pos = T)
mark.epi.imm$ratio <- mark.epi.imm$pct.1/mark.epi.imm$pct.2
data.table::setorderv(mark.epi.imm,'ratio',order = -1)
knitr::kable(mark.epi.imm[1:20,])
p_val avg_log2FC pct.1 pct.2 p_val_adj ratio
CGB8—SIRPA 0 1.4009639 0.316 0.023 0 13.739130
LTB—CD40 0 1.2700641 0.144 0.013 0 11.076923
PDGFB—LRP1 0 0.4654225 0.166 0.015 0 11.066667
PDGFB—ITGAV 0 0.5504246 0.231 0.021 0 11.000000
LTB—LTBR 0 1.1479867 0.141 0.014 0 10.071429
LTB—TNFRSF1A 0 2.0525071 0.281 0.028 0 10.035714
IGF2—INSR 0 1.0476898 0.282 0.032 0 8.812500
SEMA7A—PLXNC1 0 1.0434091 0.358 0.045 0 7.955556
TNF—TNFRSF1A 0 0.7465077 0.224 0.029 0 7.724138
TNF—LTBR 0 0.2675594 0.100 0.013 0 7.692308
COL6A2—ITGB1 0 2.3063884 0.760 0.102 0 7.450980
SEMA7A—ITGB1 0 1.6172146 0.682 0.092 0 7.413043
FGF2—SDC2 0 1.0503114 0.229 0.031 0 7.387097
CXCL12—ITGB1 0 0.4788986 0.216 0.030 0 7.200000
FGF2—SDC4 0 0.4202222 0.144 0.020 0 7.200000
CXCL12—CD4 0 0.3060225 0.128 0.018 0 7.111111
IGF2—IGF2R 0 1.2964819 0.415 0.059 0 7.033898
SERPINE1—ITGAV 0 2.2619087 0.365 0.052 0 7.019231
IGFL1—IGFLR1 0 1.3580303 0.215 0.032 0 6.718750
SERPINE1—ITGB5 0 1.2895377 0.141 0.021 0 6.714286

A couple mechanisms of interest (TNF-C and TNF-A!)

p5 <- FeaturePlot(epi.imm,'LTB—LTBR',order = T,pt.size = 1,max.cutoff = 30) #TNF-C
p6 <- FeaturePlot(epi.imm,'TNF—TNFRSF1A',order = T,pt.size = 1,max.cutoff = 15)
cowplot::plot_grid(p5,p6)

Small multiple:

# Epi-Imm
p1 <- DimPlot(epi.imm,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=4,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))+theme(legend.text = element_text(size=8))
p5 <- FeaturePlot(epi.imm,'LTB—LTBR',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.imm,'TNF—TNFRSF1A',order = T,pt.size = 1,max.cutoff = 6,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)

Saving our work for later:

save(mark.epi.imm,file = 'mark.epi.imm.Robj')
save(epi.imm,file = 'epi.imm.Robj')

We can also go fishing for specific markers. Here are two epithelial-associated cytokines that came up in talks at ATS. We can quickly query these within a given object and ask ‘who is listening’:

p1 <- DimPlot(epi.imm,raster = F,pt.size = 1)+ggplot2::ggtitle('Signaling Archetype')+NoAxes()+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p2 <- DimPlot(epi.imm,group.by = 'Disease_Identity.Sending',raster = F,shuffle = T,pt.size = 1,cols = col.pal$Disease)+NoAxes()+ggtitle('Disease Identity')+ theme(legend.position="top")+guides(color=guide_legend(nrow=2,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p3 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Sending',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Sending Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=3,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))
p4 <- DimPlot(epi.imm,group.by = 'Manuscript_Identity.Receiving',raster = F,pt.size = 1,cols = col.pal$Type)+NoAxes()+ggtitle('Receiving Cell Type')+ theme(legend.position="top")+guides(color=guide_legend(nrow=4,override.aes = list(size=5),byrow=TRUE))+theme(plot.title=element_text(hjust=0))+theme(legend.text = element_text(size=8))
p5 <- FeaturePlot(epi.imm,'IL11—IL6ST',order = T,pt.size = 0.5,max.cutoff = 30,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
p6 <- FeaturePlot(epi.imm,'CCL2—CCR1',order = T,pt.size = 0.5,max.cutoff = 30,raster = F)+NoAxes()+ theme(legend.position="top")+theme(plot.title=element_text(hjust=0))
cowplot::plot_grid(p1,p2,p3,p4,p5,p6,nrow = 1,align = T)

Thoughts?

Do we like this? Does it align well with other work? Suggestions for improvement?